#!/usr/bin/env python
# -*- coding:utf-8 -*-

import urllib2
import json
from lxml import etree

# 用户 //div[contains(@id,"qiushi_tag_")]//h2
# 内容 //div[contains(@id,"qiushi_tag_")]/a/div[@class="content"]/span[1]
# 点赞 //div[contains(@id,"qiushi_tag_")]/div[@class="stats"]/span[@class="stats-vote"]/i
# 评论 //div[contains(@id,"qiushi_tag_")]/div[@class="stats"]/span[@class="stats-comments"]/a/i[@class="number"]


url = "https://www.qiushibaike.com/8hr/page/1/"
headers = {"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;"}
request = urllib2.Request(url, headers=headers)
html = urllib2.urlopen(request).read()
text = etree.HTML(html)
# 所有段子的根节点
node_list = text.xpath('//div[contains(@id,"qiushi_tag_")]')

# items ={}
for node in node_list:
    # 用户名
    username = node.xpath('.//h2')[0].text
    # 内容
    content = node.xpath('./a/div[@class="content"]/span[1]')[0].text
    # 点赞
    vote = node.xpath('./div[@class="stats"]/span[@class="stats-vote"]/i')[0].text
    # 评论
    comment = node.xpath('./div[@class="stats"]/span[@class="stats-comments"]/a/i[@class="number"]')[0].text

    items = {
        "username": username,
        "content": content,
        "vote": vote,
        "comment": comment
    }

    # a为追加的形式
    with open('qiushi.json', "a") as f:
        f.write(json.dumps(items, ensure_ascii=False).encode('utf-8') + "\n")
